# 加载markdown文件
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import CSVLoader

loader = TextLoader("data/example_markdown.md")
data = loader.load()
print(data)

# 加载csv文件
loader = CSVLoader(file_path="data/example_csv.csv")
data = loader.load()
print(data)

# 加载Excel
from langchain_community.document_loaders import DirectoryLoader
# 目录下有excel文件，我们需要把目录下所有的xlsx文件都加载过来
# 需要先安装 pip install unstructured[xlsx]
# 目录下的.html和.rst文件不会被这种loader加载
# loader = DictionaryLoader(path="./data", glob="*.xlsx")
loader = DirectoryLoader(path="./data", glob="*.xlsx")
docs = loader.load()
print("文件数量：" + str(len(docs)))

# 加载HTML
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_community.document_loaders import BSHTMLLoader
loader = UnstructuredHTMLLoader("./data/example_html.html")
data = loader.load()
print(data)

loader = BSHTMLLoader("./data/example_html.html")
data = loader.load()
print(data)

# 加载JSON
# !pip install jq
from langchain_community.document_loaders import JSONLoader
loader = JSONLoader(file_path="./data/example_json.json", jq_schema=".messages[]",text_content=False)
data = loader.load()
print(data)




